#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient
# from bson.son import SON
import sys

SERVER = 'localhost'
PORT = 27017
DB_NAME = "journal"
COLLECTION_NAME = "journal"

connection = MongoClient(SERVER, PORT)
db = connection[DB_NAME]
collection = db[COLLECTION_NAME]

total_count = 0
find_count = 0
missing_count = 0
multi_count = 0
infile = open(sys.argv[1], "r")
infile.readline()
outfile1=open(sys.argv[1]+".pmcfind","w")
outfile2=open(sys.argv[1]+".pmcmiss","w")

for line in infile:
    total_count += 1
    line = line.rstrip()
    mod_line=line[1:-1]
    splitline = mod_line.split("\",\"")
    nlmid = splitline[5].strip()
    if nlmid != "":
        results = collection.find({"NlmId": nlmid}).count()
        if results == 0:
            missing_count += 1
            outfile2.write(line+"\n")
            outfile2.flush()
        elif results == 1:
            find_count += 1
            outfile1.write(line+"\n")
            outfile1.flush()
        elif results > 1:
            multi_count += 1
infile.close()
connection.close()
outfile1.close()
outfile2.close()
print missing_count
print find_count
print multi_count
print total_count
